MINIMIZING THE COST FUNCTION: GRADIENT DESCENT¶

image.png

HEMANT THAPA¶

In [1]:
import pandas as pd 
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from matplotlib.animation import FuncAnimation
import statistics as st
import yfinance as yf
import tensorflow as tf
import PIL
import math
import warnings
warnings.filterwarnings("ignore")
2023-08-22 12:56:30.853827: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
In [2]:
from IPython.display import HTML
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_classification
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, BatchNormalization, Dropout
from keras.callbacks import EarlyStopping
from tensorflow.keras import regularizers 
from tensorflow.keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
In [3]:
class Stock:
    def __init__(self, ticker, period):
        self.ticker = ticker
        self.period = period
        
    def chart(self):
        return yf.download(self.ticker.upper(), period=self.period)
In [4]:
hsbc = Stock("HSBA.L", "5Y").chart()
lloyds = Stock("LLOY.L", "5Y").chart()
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
In [5]:
class StandardScale:
    def __init__ (self, data):
        self.data = data
    
    def scale_fit(self):
        return (self.data - self.data.mean())/self.data.std()

CONVERTING PENCE INTO POUND¶

In [6]:
hsbc['Close'] = hsbc['Close']/100
In [7]:
hsbc[:5]
Out[7]:
Open High Low Close Adj Close Volume
Date
2018-08-22 691.000000 694.000000 689.099976 6.893 533.428467 28618071
2018-08-23 689.400024 690.099976 685.099976 6.882 532.577209 20936281
2018-08-24 687.099976 688.599976 684.299988 6.862 531.029602 17636211
2018-08-28 694.200012 695.900024 685.900024 6.906 534.434570 21250214
2018-08-29 693.099976 694.599976 683.400024 6.849 530.023560 21954676
In [8]:
hsbc = StandardScale(hsbc).scale_fit()
In [9]:
hsbc[:5]
Out[9]:
Open High Low Close Adj Close Volume
Date
2018-08-22 1.661454 1.644679 1.689540 1.647340 0.919598 -0.116000
2018-08-23 1.646161 1.607335 1.651309 1.636796 0.909903 -0.543748
2018-08-24 1.624175 1.592972 1.643663 1.617627 0.892277 -0.727507
2018-08-28 1.692043 1.662873 1.658956 1.659800 0.931057 -0.526267
2018-08-29 1.681528 1.650424 1.635061 1.605167 0.880818 -0.487040
In [10]:
lloyds['Close'] = lloyds['Close']/100
In [11]:
lloyds[:5]
Out[11]:
Open High Low Close Adj Close Volume
Date
2018-08-22 60.540001 61.320000 60.540001 0.6100 46.861271 175839001
2018-08-23 60.950001 60.980000 60.380001 0.6076 46.676895 230949635
2018-08-24 60.580002 61.220001 60.360001 0.6084 46.738354 145231507
2018-08-28 61.259998 61.639999 60.630001 0.6079 46.699947 128154783
2018-08-29 60.770000 60.919998 59.880001 0.6046 46.446430 198641536
In [12]:
lloyds = StandardScale(lloyds).scale_fit()
In [13]:
lloyds[:5]
Out[13]:
Open High Low Close Adj Close Volume
Date
2018-08-22 1.395586 1.414171 1.459984 1.448087 0.838523 -0.336483
2018-08-23 1.437804 1.379009 1.443520 1.423309 0.813155 0.192040
2018-08-24 1.399705 1.403830 1.441462 1.431568 0.821611 -0.630015
2018-08-28 1.469725 1.447266 1.469245 1.426406 0.816327 -0.793785
2018-08-29 1.419269 1.372803 1.392069 1.392336 0.781447 -0.117802
In [14]:
X = hsbc.Close.values.reshape(-1,1).astype('float')
In [15]:
y = lloyds.Close.values.reshape(-1,1).astype('float')
In [16]:
X.shape
Out[16]:
(1262, 1)
In [17]:
X = X[:1260]
In [18]:
X.shape
Out[18]:
(1260, 1)
In [19]:
y.shape
Out[19]:
(1260, 1)
In [20]:
X_test, X_train, y_test, y_train = train_test_split(X,y, test_size=0.2)
In [21]:
print(X_test.shape)
print(y_test.shape)
(1008, 1)
(1008, 1)
In [22]:
print(X_train.shape)
print(y_train.shape)
(252, 1)
(252, 1)
In [23]:
model = LinearRegression().fit(X_train, y_train)
model
Out[23]:
LinearRegression()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
LinearRegression()
In [24]:
y_pred = model.predict(X_test)
In [25]:
plt.figure(figsize=(10,5))
plt.scatter(X_train, y_train, s=20)
plt.plot(X_test, y_pred, color="red")
plt.xlabel('LLOYDS BANKS')
plt.ylabel('HSBC HOLDINGS')
plt.title('Correaltion between LLOYDS & HSBC')
plt.grid(True, linestyle="--", color="blue", alpha=0.4)
plt.show()
In [26]:
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r_square = r2_score(y_test, y_pred)

print("Mean Absolute Error:", mae)
print("Mean Squared Error:", mse)
print("Root Mean Squared Error:", rmse)
print("R-squared:", r_square)
Mean Absolute Error: 0.45636300621391074
Mean Squared Error: 0.31258313014377354
Root Mean Squared Error: 0.55909134329175
R-squared: 0.6796129217150708

image.png

image.png

Gradient descent is an efficient optimization algorithm that attempts to find a local or global minimum of the cost function.¶

image-2.png

  1. A local minimum is a point where our function is lower than all neighboring points. It is not possible to decrease the value of the cost function by making infinitesimal steps.

  2. A global minimum is a point that obtains the absolute lowest value of our function, but global minima are difficult to compute in practice.

In [27]:
class GradientDescent:
    def __init__(self, x, y, m_curr=0, c_curr=0, iteration=500, rate=0.01):
        self.x = x
        self.y = y
        self.predicted_y = (m_curr * x) + c_curr  # Initialize predicted_y using initial slope and intercept
        self.m_curr = m_curr
        self.c_curr = c_curr
        self.iteration = iteration
        self.rate = rate
    def cost_function(self):
        N = len(self.y)
         #mean squared error
        return sum((self.y - self.predicted_y) ** 2) / N
    
    def calculation(self):
        N = float(len(self.y))
        gradient_descent = pd.DataFrame(columns=['m_curr', 'c_curr', 'cost'])
        # Perform gradient descent iterations
        for i in range(self.iteration):
            # Calculate the predicted y values using current slope and intercept
            self.predicted_y = (self.m_curr * self.x) + self.c_curr
            cost = self.cost_function()
             # Calculate gradients for slope (m_grad) and intercept (c_grad)
            m_gradient = -(2/N) * np.sum(self.x * (self.y - self.predicted_y))
            c_gradient = -(2/N) * np.sum(self.y - self.predicted_y)
            # Update the slope and intercept using gradient and learning rate
            self.m_curr -= self.rate * m_gradient
            self.c_curr -= self.rate * c_gradient
            
            gradient_descent.loc[i] = [self.m_curr, self.c_curr, cost]
            
        return gradient_descent
In [28]:
gd = GradientDescent(X_train, y_train).calculation()
In [29]:
gd['cost'] = gd['cost'].explode()
In [30]:
plt.figure(figsize=(10,5))
gd.cost.plot(color="red", linestyle="--")
plt.scatter(gd.index[-1:], gd.cost[-1:], s=30, color="grey")
plt.scatter(gd.index[0:1], gd.cost[0:1], s=30, color="grey")
plt.ylabel('Cost')
plt.xlabel('Iteration')
plt.title('Gradient descent')
plt.grid(True, linestyle="--", color="blue", alpha=0.4)
plt.show()
In [31]:
fig = plt.figure()
fig, ax = plt.subplots(figsize=(15, 6))
plt.ylabel("LLOYDS", fontsize=16)
plt.xlabel("HSBC", fontsize=16)
plt.title('Linear Regression', fontsize=16)
plt.grid(True, linestyle="--")
plt.scatter(X_train, y_train, color='gray', s=40)
line, = ax.plot([], [], lw=2)
annotation = ax.annotate('', xy=(0.5, 0.95), xycoords='axes fraction', fontsize=10)

def init():
    line.set_data([], [])
    annotation.set_text('')
    return line, annotation

def animate(i):
    m_curr, c_curr, cost = gd.loc[i]  
    x_vals = X_test
    y_pred = m_curr * x_vals + c_curr
    line.set_data(x_vals, y_pred)
    annotation.set_text('Cost = %.2f' % cost)
    return line, annotation

anim = animation.FuncAnimation(fig, animate, init_func=init, frames=len(gd), interval=40, blit=True)

HTML(anim.to_jshtml())
Animation size has reached 21020100 bytes, exceeding the limit of 20971520.0. If you're sure you want a larger animation embedded, set the animation.embed_limit rc parameter to a larger value (in MB). This and further frames will be dropped.
Out[31]:
<Figure size 640x480 with 0 Axes>
In [46]:
# anim = FuncAnimation(fig, animate, init_func=init, frames=len(gd), interval=40, blit=True)
# anim.save('linear_regression_animation.gif', writer='pillow')
In [33]:
X.shape
Out[33]:
(1260, 1)
In [34]:
y.shape
Out[34]:
(1260, 1)
In [35]:
X_train_temp, X_test, y_train_temp, y_test = train_test_split(X, y, test_size=0.2, random_state=False)
X_train, X_val, y_train, y_val = train_test_split(X_train_temp, y_train_temp, test_size=0.2, random_state=False)
In [36]:
model = Sequential([
    Dense(units=1, input_shape=(1,), activation='linear', use_bias=True)
])
In [37]:
model.compile(optimizer='sgd', loss='mean_squared_error')
In [38]:
history = model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=100, verbose=0)
In [39]:
y_pred = model.predict(X_train)
y_pred[:5]
26/26 [==============================] - 0s 2ms/step
Out[39]:
array([[ 0.27088347],
       [-1.0405173 ],
       [-0.57843864],
       [-1.1212921 ],
       [-0.913812  ]], dtype=float32)
In [40]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
In [41]:
mae = mean_absolute_error(y_train, y_pred)
mse = mean_squared_error(y_train, y_pred)
rmse = np.sqrt(mse)
r_square = r2_score(y_train, y_pred)

print("Mean Absolute Error:", mae)
print("Mean Squared Error:", mse)
print("Root Mean Squared Error:", rmse)
print("R-squared:", r_square)
Mean Absolute Error: 0.464512559263437
Mean Squared Error: 0.32534344680901806
Root Mean Squared Error: 0.5703888557896429
R-squared: 0.6721295551316442
In [42]:
y_pred.shape
Out[42]:
(806, 1)
In [43]:
X_train.shape
Out[43]:
(806, 1)
In [44]:
plt.figure(figsize=(10, 6))
plt.plot(X_train, y_pred, color='red', label='Tensor Flow', linewidth=2)
sns.scatterplot(x=X_train_temp[:, 0].flatten(), y=y_train_temp.flatten(), color='black', label='Data Points', s=40)
plt.grid(True, linestyle="--", alpha=0.5)
plt.xlabel("HSBC")
plt.ylabel("LLOYDS")
plt.title("Correlation between HSBC & LLOYDS")
plt.legend()
plt.show()
In [45]:
plt.figure(figsize=(10, 6))
plt.plot(history.history['loss'], label='Training Loss', linewidth=3)
plt.plot(history.history['val_loss'], label='Validation Loss', linewidth=3, linestyle="--")
plt.title('Training and Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True, linestyle="--", alpha=0.5)
plt.show()

REFERENCES:¶

Towards Data Science. (Year, Month Day). Minimizing the Cost Function: Gradient Descent. Towards Data Science. https://towardsdatascience.com/minimizing-the-cost-function-gradient-descent-a5dd6b5350e1

Study.com. (Year). Average Cost Function: Formula & Examples. Study.com. https://study.com/learn/lesson/average-cost-function-formula-examples.html